In [1]:
import librosa
from librosa import display
from PIL import Image
from matplotlib import pyplot
from numpy import asarray
import matplotlib.pyplot as plt
import numpy as np
import cmath
import seaborn as sns
import scipy
import IPython.display as ipd
import math
from numpy.linalg import inv
import torch 
import torchvision
from torchvision import datasets
import numpy as np
import time
import numpy
import matplotlib.pyplot as plt
import torch.nn as nn
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Uploading Audio Clean_Male

In [2]:
train_clean_male, sr1 = librosa.load("train_clean_male.wav",sr=None)
S=librosa.stft(train_clean_male,n_fft=1024,hop_length=512)
ipd.display(ipd.Audio(train_clean_male,rate=16000))

Uploading Audio Dirty_Male

In [3]:
sn,sr2=librosa.load("train_dirty_male.wav",sr=None)
X=librosa.stft(sn,n_fft=1024,hop_length=512)
ipd.display(ipd.Audio(sn,rate=16000))

Taking Magnitude of Clean_Male_Voice and Dirty_Male_Voice

In [4]:
mod_S=np.abs(S)
mod_X=np.abs(X)

Appending 19 silent frames at the beggining of DirtyMale Voice Spectogram

In [5]:
appended_input_X=np.zeros((513, 19))
concatenated_mod_X=np.concatenate((appended_input_X,mod_X),1)
np.shape(mod_S),np.shape(concatenated_mod_X)
Out[5]:
((513, 2459), (513, 2478))

Model Creation

The Model Architecture is as follows:

1) Convolution Layer with kernel size = 2, stride =1 and 8 filters followed ny ReLu Activation.

2) Average Pooling with kernel size=2 and stride =1

3)Convolution Layer with kernel size = 2, stride =1 and 16 filters followed ny ReLu Activation.

4) Average Pooling with kernel size=2 and stride =1

5) Convolution Layer with kernel size = 2, stride =2 and 32 filters followed ny ReLu Activation.

6) Average Pooling with kernel size=2 and stride =2

7) Reducing the layer to fullyconnected layer with 16256 neurons

8) Two Fully connected layer with 8000 and 1000 neurons with Relu Activations.

9) The final_output layer has 513 neurons as expected in the Output

In [6]:
class twodcnn(nn.Module):
    def __init__(self):
        super(twodcnn, self).__init__()

        self.conv = nn.Sequential(
                nn.Conv2d(1, 8, kernel_size=2, stride=1),
                nn.ReLU(),       
                nn.AvgPool2d(kernel_size=2, stride=1),
            
                nn.Conv2d(8, 16, kernel_size=2, stride=1),
                nn.ReLU(),
                nn.AvgPool2d(kernel_size=2, stride=1),
        
                nn.Conv2d(16, 32, kernel_size=2, stride=2),
                nn.ReLU(),
                nn.AvgPool2d(kernel_size=2, stride=2))

        self.fc1 = nn.Linear(in_features=16256, out_features=8000)
        self.fc2 = nn.Linear(in_features=8000, out_features=1000)
        self.fc3 = nn.Linear(in_features=1000, out_features=513)
        self.dropout = nn.Dropout(0.3)
        self.activation = nn.ReLU()
    def forward(self, data):
        conv_output = self.conv(data)
        output_flat = conv_output.reshape(conv_output.shape[0], -1)
        fc1_output = self.activation(self.fc1(output_flat))
        fc2_output = self.activation(self.fc2(fc1_output))
        final_output = self.activation(self.fc3(fc2_output))
        return final_output

Intializing the Training Model

In [7]:
model1 = twodcnn().cuda()
# Loss and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model1.parameters(),lr=0.0015)

Creating Frames

In [8]:
def creating_frames(mod_X):
  new_data=[]
  k=len(mod_X[0])-19
  for j in range(k):
    images1= mod_X[:,j:(j+20)]
    new_data.append(np.transpose(images1))

  new_data1=torch.FloatTensor(new_data)
  new_data2=new_data1.reshape(k,1,20,513)

  return new_data2

Function For SNR

In [9]:
def snr(ground_clean, recovered_one):
  import math
  a=min(len(ground_clean),len(recovered_one))

  ground1=ground_clean[0:a]
  recovered1=recovered_one[0:a]
  num=np.sum(np.square(ground1))
  diff=np.sum(np.square(ground1-recovered1))
  SNR=10*math.log(num/diff,10)
  return SNR

Creating Input Data

In [10]:
new_data2=creating_frames(concatenated_mod_X)
In [11]:
print(np.shape(new_data2))
torch.Size([2459, 1, 20, 513])
In [12]:
errt=[0 for i in range(200)]
for epoch in range(200):
    running_loss=0
    for j in range(20):

        if (j+1)*128 <= 2459:
          images= new_data2[128*j:128*(j+1),:,:,:].cuda()
          labels=torch.tensor(mod_S[:,128*j:128*(j+1)],device=device)
        else:
          images= new_data2[128*j:2459,:,:,:].cuda()
          labels = torch.tensor(mod_S[:,j*128:2459],device=device)
      
        # Move tensors to the configured device

        # print(np.shape(images),"images",j)
        # print(np.shape(labels),"labels",j)
        
        # Forward pass
        outputs = model1(images.cuda()).cuda()
        loss = criterion(outputs.cuda(), torch.transpose(labels,0,1))
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
    errt[epoch]=running_loss/20
    print("Epoch:", epoch,"Loss:",running_loss/20)
Epoch: 0 Loss: 0.26442752350121734
Epoch: 1 Loss: 0.0882756377570331
Epoch: 2 Loss: 0.08685475084930658
Epoch: 3 Loss: 0.08626806028187275
Epoch: 4 Loss: 0.08348528374917805
Epoch: 5 Loss: 0.07831282261759043
Epoch: 6 Loss: 0.07289079590700567
Epoch: 7 Loss: 0.07234358992427588
Epoch: 8 Loss: 0.06811073422431946
Epoch: 9 Loss: 0.06491691065020859
Epoch: 10 Loss: 0.06304459874518216
Epoch: 11 Loss: 0.06409234707243741
Epoch: 12 Loss: 0.0615347885992378
Epoch: 13 Loss: 0.05735531547106802
Epoch: 14 Loss: 0.05594591391272843
Epoch: 15 Loss: 0.06161702135577798
Epoch: 16 Loss: 0.06194683262147009
Epoch: 17 Loss: 0.053910128632560374
Epoch: 18 Loss: 0.04986663600429893
Epoch: 19 Loss: 0.047678174334578216
Epoch: 20 Loss: 0.045366938295774165
Epoch: 21 Loss: 0.04292101864703
Epoch: 22 Loss: 0.0414094977080822
Epoch: 23 Loss: 0.04066494253929705
Epoch: 24 Loss: 0.03898681662976742
Epoch: 25 Loss: 0.037085762503556906
Epoch: 26 Loss: 0.03450380498543382
Epoch: 27 Loss: 0.03319090283475816
Epoch: 28 Loss: 0.03349574585445225
Epoch: 29 Loss: 0.0369317242410034
Epoch: 30 Loss: 0.030482356529682876
Epoch: 31 Loss: 0.027003182237967847
Epoch: 32 Loss: 0.024597178236581384
Epoch: 33 Loss: 0.023426392301917075
Epoch: 34 Loss: 0.023165845265612005
Epoch: 35 Loss: 0.02261579924961552
Epoch: 36 Loss: 0.02221194232115522
Epoch: 37 Loss: 0.020125264371745288
Epoch: 38 Loss: 0.01993126035667956
Epoch: 39 Loss: 0.019894581171683967
Epoch: 40 Loss: 0.019425220205448568
Epoch: 41 Loss: 0.01825215443968773
Epoch: 42 Loss: 0.02099684360437095
Epoch: 43 Loss: 0.02024604956386611
Epoch: 44 Loss: 0.02162725639063865
Epoch: 45 Loss: 0.021412726119160654
Epoch: 46 Loss: 0.019834780157543717
Epoch: 47 Loss: 0.019971601222641765
Epoch: 48 Loss: 0.01930945282801986
Epoch: 49 Loss: 0.019118818698916584
Epoch: 50 Loss: 0.02070634193951264
Epoch: 51 Loss: 0.022272931551560758
Epoch: 52 Loss: 0.01803034698823467
Epoch: 53 Loss: 0.01493784161284566
Epoch: 54 Loss: 0.01259160382905975
Epoch: 55 Loss: 0.011962436267640441
Epoch: 56 Loss: 0.012250238633714616
Epoch: 57 Loss: 0.012752492411527783
Epoch: 58 Loss: 0.016017926926724612
Epoch: 59 Loss: 0.017039052746258675
Epoch: 60 Loss: 0.018960614199750127
Epoch: 61 Loss: 0.019428677251562478
Epoch: 62 Loss: 0.01665950723690912
Epoch: 63 Loss: 0.014537478296551854
Epoch: 64 Loss: 0.01327712417114526
Epoch: 65 Loss: 0.010746616579126567
Epoch: 66 Loss: 0.009239514725049958
Epoch: 67 Loss: 0.00929628238081932
Epoch: 68 Loss: 0.009139653173042462
Epoch: 69 Loss: 0.01061352962278761
Epoch: 70 Loss: 0.01084669198608026
Epoch: 71 Loss: 0.0116127276327461
Epoch: 72 Loss: 0.013669400266371668
Epoch: 73 Loss: 0.014572509890422225
Epoch: 74 Loss: 0.014557301416061819
Epoch: 75 Loss: 0.010537125717382878
Epoch: 76 Loss: 0.011276641767472029
Epoch: 77 Loss: 0.010519491677405313
Epoch: 78 Loss: 0.010697968729073182
Epoch: 79 Loss: 0.009456937911454587
Epoch: 80 Loss: 0.009749745042063296
Epoch: 81 Loss: 0.009542026615235955
Epoch: 82 Loss: 0.010611352528212593
Epoch: 83 Loss: 0.00963060709182173
Epoch: 84 Loss: 0.00907372108194977
Epoch: 85 Loss: 0.008057017641840502
Epoch: 86 Loss: 0.007556762994499877
Epoch: 87 Loss: 0.008372457238147035
Epoch: 88 Loss: 0.00703418820630759
Epoch: 89 Loss: 0.008177722012624145
Epoch: 90 Loss: 0.008025166409788653
Epoch: 91 Loss: 0.010722543712472544
Epoch: 92 Loss: 0.007703009399119764
Epoch: 93 Loss: 0.008222971711074934
Epoch: 94 Loss: 0.007605951261939481
Epoch: 95 Loss: 0.009522023668978363
Epoch: 96 Loss: 0.007125475106295198
Epoch: 97 Loss: 0.007417198043549433
Epoch: 98 Loss: 0.0064475649909581986
Epoch: 99 Loss: 0.007539080461720004
Epoch: 100 Loss: 0.0059935541416052725
Epoch: 101 Loss: 0.00661188542144373
Epoch: 102 Loss: 0.005966265563620255
Epoch: 103 Loss: 0.007408985873917117
Epoch: 104 Loss: 0.00601165410480462
Epoch: 105 Loss: 0.0075563103659078475
Epoch: 106 Loss: 0.006816565844928846
Epoch: 107 Loss: 0.008915602223714814
Epoch: 108 Loss: 0.005931321642128751
Epoch: 109 Loss: 0.006484872818691656
Epoch: 110 Loss: 0.005953775672242046
Epoch: 111 Loss: 0.007484158826991915
Epoch: 112 Loss: 0.006016464059939608
Epoch: 113 Loss: 0.006459112209267914
Epoch: 114 Loss: 0.0064380435331258925
Epoch: 115 Loss: 0.007014576910296455
Epoch: 116 Loss: 0.007233427162282169
Epoch: 117 Loss: 0.0063586376723833386
Epoch: 118 Loss: 0.005750777007779107
Epoch: 119 Loss: 0.005333074467489496
Epoch: 120 Loss: 0.005888195033185184
Epoch: 121 Loss: 0.00488993659382686
Epoch: 122 Loss: 0.004882443603128195
Epoch: 123 Loss: 0.0045957956492202355
Epoch: 124 Loss: 0.005169743491569534
Epoch: 125 Loss: 0.00492308858956676
Epoch: 126 Loss: 0.006581641244702041
Epoch: 127 Loss: 0.005831739649875089
Epoch: 128 Loss: 0.006521212606457993
Epoch: 129 Loss: 0.005263538181316108
Epoch: 130 Loss: 0.0050267052109120415
Epoch: 131 Loss: 0.006138919829390943
Epoch: 132 Loss: 0.006530143291456625
Epoch: 133 Loss: 0.008817223767982796
Epoch: 134 Loss: 0.0058738471125252545
Epoch: 135 Loss: 0.005852454254636541
Epoch: 136 Loss: 0.007441277464386076
Epoch: 137 Loss: 0.0077826140623074025
Epoch: 138 Loss: 0.009903433104045688
Epoch: 139 Loss: 0.0065744957653805615
Epoch: 140 Loss: 0.0056598561350256205
Epoch: 141 Loss: 0.006486870697699487
Epoch: 142 Loss: 0.007994659146061168
Epoch: 143 Loss: 0.00905696566333063
Epoch: 144 Loss: 0.007762249413644895
Epoch: 145 Loss: 0.007257559068966657
Epoch: 146 Loss: 0.007735501235583797
Epoch: 147 Loss: 0.007118309836369008
Epoch: 148 Loss: 0.004993385495617985
Epoch: 149 Loss: 0.0045630684675415974
Epoch: 150 Loss: 0.005370798514923081
Epoch: 151 Loss: 0.007665369450114668
Epoch: 152 Loss: 0.007539820543024689
Epoch: 153 Loss: 0.006311351869953796
Epoch: 154 Loss: 0.006375807581935078
Epoch: 155 Loss: 0.007741044444264844
Epoch: 156 Loss: 0.006497785577084869
Epoch: 157 Loss: 0.006126189517090097
Epoch: 158 Loss: 0.0053366555366665125
Epoch: 159 Loss: 0.006332615640712902
Epoch: 160 Loss: 0.006896345329005271
Epoch: 161 Loss: 0.0074586593662388624
Epoch: 162 Loss: 0.0059591172554064546
Epoch: 163 Loss: 0.006793727760668844
Epoch: 164 Loss: 0.009534394409274682
Epoch: 165 Loss: 0.007630280370358377
Epoch: 166 Loss: 0.005833707525744103
Epoch: 167 Loss: 0.004796346375951543
Epoch: 168 Loss: 0.006748996576061472
Epoch: 169 Loss: 0.004851504089310765
Epoch: 170 Loss: 0.005249348486540839
Epoch: 171 Loss: 0.00453921593434643
Epoch: 172 Loss: 0.005911485198885203
Epoch: 173 Loss: 0.005215132492594421
Epoch: 174 Loss: 0.005112008325522765
Epoch: 175 Loss: 0.005678609281312674
Epoch: 176 Loss: 0.005928449903149158
Epoch: 177 Loss: 0.007037953671533615
Epoch: 178 Loss: 0.0063450904621277
Epoch: 179 Loss: 0.006336094625294208
Epoch: 180 Loss: 0.005560226290253922
Epoch: 181 Loss: 0.004900987690780312
Epoch: 182 Loss: 0.003833544993540272
Epoch: 183 Loss: 0.003640691199689172
Epoch: 184 Loss: 0.0031888372730463745
Epoch: 185 Loss: 0.00311059704690706
Epoch: 186 Loss: 0.0030682071519549936
Epoch: 187 Loss: 0.0035812151822028683
Epoch: 188 Loss: 0.0036303306056652217
Epoch: 189 Loss: 0.0038503261224832387
Epoch: 190 Loss: 0.00486928422760684
Epoch: 191 Loss: 0.004638447138131596
Epoch: 192 Loss: 0.005580971785821021
Epoch: 193 Loss: 0.005918320856289938
Epoch: 194 Loss: 0.006181127083254978
Epoch: 195 Loss: 0.005681899579940364
Epoch: 196 Loss: 0.004935385333374143
Epoch: 197 Loss: 0.006121322035323828
Epoch: 198 Loss: 0.0046310659876326096
Epoch: 199 Loss: 0.005533859157003463
In [13]:
plt.figure()
plt.plot(errt)
plt.title('Convergence Plot of Error')
Out[13]:
Text(0.5, 1.0, 'Convergence Plot of Error')

Predicting the Signal for the Training Data

In [14]:
with torch.no_grad():
  new_outputs=model1(new_data2.cuda())
In [15]:
print(np.shape(new_outputs))
torch.Size([2459, 513])
In [16]:
train_signal=torch.transpose(new_outputs,0,1)
print(train_signal)
tensor([[0.0128, 0.0128, 0.0161,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0034],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],
       device='cuda:0')

Recovered Signal for Training Signal

In [17]:
new_output_train=torch.div(torch.mul(torch.tensor(X,device=device),train_signal),torch.abs(torch.tensor(X,device=device)))
recovered_train=(new_output_train.data).cpu().numpy()
signal_train=librosa.core.istft(recovered_train, hop_length=512)
ipd.display(ipd.Audio(librosa.core.istft(recovered_train, hop_length=512),rate=16000))

SNR Calculation for Training Data

In [18]:
snr(train_clean_male,signal_train)
Out[18]:
12.782286760875671
In [19]:
librosa.output.write_wav( "train_signal.wav",signal_train, sr=16000, norm=False)

Uploading Test_x_01 signal

In [20]:
test_x_01,sr2=librosa.load("test_x_01.wav",sr=None)
ipd.display(ipd.Audio(test_x_01,rate=16000))
In [21]:
testx01=librosa.stft(test_x_01,n_fft=1024,hop_length=512)
testx01_abs=np.abs(testx01)
testx01_silent_frames=np.concatenate((np.zeros((513,19)),testx01_abs),1)
print(np.shape(testx01_abs))
print(np.shape(testx01_silent_frames))
(513, 142)
(513, 161)
In [22]:
input_data_test_x_01=creating_frames(testx01_silent_frames)
In [23]:
print(np.shape(input_data_test_x_01))
torch.Size([142, 1, 20, 513])
In [24]:
with torch.no_grad(): 
  new_outputs_test_x_01=model1(input_data_test_x_01.cuda()).cuda()
print(np.shape(new_outputs_test_x_01))
torch.Size([142, 513])

Recovered Signal for Test Signal 1

In [25]:
new_outputs1_test_x_01=torch.transpose(new_outputs_test_x_01,0,1)
new_output2=torch.div(torch.mul(torch.tensor(testx01,device=device),new_outputs1_test_x_01),torch.abs(torch.tensor(testx01,device=device)))

recovered_test_01_x=(new_output2.data).cpu().numpy()
signal_test_01_x=librosa.core.istft(recovered_test_01_x, hop_length=512)
ipd.display(ipd.Audio(librosa.core.istft(recovered_test_01_x, hop_length=512),rate=16000))

Uploading Test Signal test_x_02

In [26]:
test_x_02,sr2=librosa.load("test_x_02.wav",sr=None)
ipd.display(ipd.Audio(test_x_02,rate=16000))
In [27]:
testx02=librosa.stft(test_x_02,n_fft=1024,hop_length=512)
testx02_abs=np.abs(testx02)
testx02_silent_frames=np.concatenate((np.zeros((513,19)),testx02_abs),1)
print(np.shape(testx02_abs))
print(np.shape(testx02_silent_frames))
(513, 380)
(513, 399)
In [28]:
input_data_test_x_02=creating_frames(testx02_silent_frames)
In [29]:
print(np.shape(input_data_test_x_02))
torch.Size([380, 1, 20, 513])
In [30]:
with torch.no_grad(): 
  new_outputs_test_x_02=model1(input_data_test_x_02.cuda()).cuda()
print(np.shape(new_outputs_test_x_02))
torch.Size([380, 513])

Recovered Signal for test_x_02

In [31]:
new_outputs1_test_x_02=torch.transpose(new_outputs_test_x_02,0,1)
new_output12=torch.div(torch.mul(torch.tensor(testx02,device=device),new_outputs1_test_x_02),torch.abs(torch.tensor(testx02,device=device)))

recovered_test_02_x=(new_output12.data).cpu().numpy()
signal_test_02_x=librosa.core.istft(recovered_test_02_x, hop_length=512)
ipd.display(ipd.Audio(librosa.core.istft(recovered_test_02_x, hop_length=512),rate=16000))